pest_data <- readRDS('pest_data.RDS')
str(pest_data)
N_buildings <- length(unique(pest_data$building_id))
N_buildings
# preliminary plots
ggplot(pest_data, aes(x = complaints)) +
geom_bar()
library (arm)
library(R2WinBUGS)
library(rstanarm)
library(ggplot2)
theme_set(theme_minimal())  # customize the ggplot2 setting, not mandatory
library(bayesplot)
library(rstan)
library(invgamma)
library(gridExtra)
library(tidyr)
library(rprojroot)
library (foreign)
library(loo)
pest_data <- readRDS('pest_data.RDS')
str(pest_data)
N_buildings <- length(unique(pest_data$building_id))
N_buildings
# preliminary plots
ggplot(pest_data, aes(x = complaints)) +
geom_bar()
ggplot(pest_data, aes(x = traps, y = complaints, color = live_in_super == TRUE)) +
geom_jitter()
complaints <- pest_data$complaints
traps <- pest_data$traps
stan_dat_simple <- list(
N = nrow(pest_data),
complaints = pest_data$complaints,
traps = pest_data$traps
)
comp_model_P <- stan_model('simple_poisson_regression.stan')
library (arm)
library(R2WinBUGS)
library(rstanarm)
library(ggplot2)
theme_set(theme_minimal())  # customize the ggplot2 setting, not mandatory
library(bayesplot)
library(rstan)
library(invgamma)
library(gridExtra)
library(tidyr)
library(rprojroot)
library (foreign)
library(loo)
srrs2 <- read.table ("srrs2.dat", header=T, sep=",")
mn <- srrs2$state=="MN"
radon <- srrs2$activity[mn]
log.radon <- log (ifelse (radon==0, .1, radon))
floor <- srrs2$floor[mn]       # 0 for basement, 1 for first floor
n <- length(radon)
y <- log.radon
x <- floor
# get county index variable
county.name <- as.vector(srrs2$county[mn])
uniq <- unique(county.name)
# get county index variable
county.name <- as.vector(srrs2$county[mn])
uniq <- unique(county.name)
J <- length(uniq)
county <- rep (NA, J)
for (i in 1:J){
county[county.name==uniq[i]] <- i
}
# no predictors
ybarbar = mean(y)
sample.size <- as.vector (table (county))
sample.size.jittered <- sample.size*exp (runif (J, -.1, .1))
cty.mns = tapply(y,county,mean)
cty.vars = tapply(y,county,var)
cty.sds = mean(sqrt(cty.vars[!is.na(cty.vars)]))/sqrt(sample.size)
cty.sds.sep = sqrt(tapply(y,county,var)/sample.size)
help(stan_lmer)
library (arm)
library(R2WinBUGS)
library(rstanarm)
library(ggplot2)
theme_set(theme_minimal())  # customize the ggplot2 setting, not mandatory
library(bayesplot)
library(rstan)
library(invgamma)
library(gridExtra)
library(tidyr)
library(rprojroot)
library (foreign)
library(loo)
srrs2 <- read.table ("srrs2.dat", header=T, sep=",")
mn <- srrs2$state=="MN"
radon <- srrs2$activity[mn]
log.radon <- log (ifelse (radon==0, .1, radon))
floor <- srrs2$floor[mn]       # 0 for basement, 1 for first floor
n <- length(radon)
y <- log.radon
x <- floor
# get county index variable
county.name <- as.vector(srrs2$county[mn])
uniq <- unique(county.name)
J <- length(uniq)
county <- rep (NA, J)
for (i in 1:J){
county[county.name==uniq[i]] <- i
}
# no predictors
ybarbar = mean(y)
sample.size <- as.vector (table (county))
sample.size.jittered <- sample.size*exp (runif (J, -.1, .1))
cty.mns = tapply(y,county,mean)
cty.vars = tapply(y,county,var)
cty.sds = mean(sqrt(cty.vars[!is.na(cty.vars)]))/sqrt(sample.size)
cty.sds.sep = sqrt(tapply(y,county,var)/sample.size)
mlm.radon.nopred <- stan_lmer(y ~ 1+ (1|county))
library(lme4)
mlm.radon.nopred.2 <- lmer(y ~ 1+ (1|county)) # frequentist fit
print(mlm.radon.nopred)
display(mlm.radon.nopred.2)
par(mfrow=c(1,2))
plot (sample.size.jittered, cty.mns, cex.lab=1.6, cex.axis=1,
xlab="sample size in county j",
ylab="avg. log radon in county j",
pch=20, log="x", cex=.8, mgp=c(1.5,.5,0),
ylim=c(0,3.2), yaxt="n", xaxt="n", cex.main = 1.8)
axis (1, c(1,3,10,30,100), cex.axis=.9, mgp=c(1.5,.5,0))
axis (2, seq(0,3), cex.axis=.9, mgp=c(1.5,.5,0))
for (j in 1:J){
lines (rep(sample.size.jittered[j],2),
cty.mns[j] + c(-1,1)*cty.sds[j], lwd=.5)
#         cty.mns[j] + c(-1,1)*mean(cty.sds[!is.na(cty.sds)]), lwd=.5)
}
abline(h=mlm.radon.nopred$coefficients[1])
title("No pooling",cex.main=1.8, line=1)
#abline(h=ybarbar)
points(sample.size.jittered[36],cty.mns[36],cex=4)
plot (sample.size.jittered,
mlm.radon.nopred$coefficients[1]+ mlm.radon.nopred$coefficients[2:86],
cex.lab=1.6, cex.axis=1,
xlab="sample size in county j",
ylab="avg. log radon in county j",
pch=20, log="x", cex=.8, mgp=c(1.5,.5,0),
ylim=c(0,3.2), yaxt="n", xaxt="n")
axis (1, c(1,3,10,30,100), cex.axis=.9, mgp=c(1.5,.5,0))
axis (2, seq(0,3), cex.axis=.9, mgp=c(1.5,.5,0))
for (j in 1:J){
lines (rep(sample.size.jittered[j],2),
mlm.radon.nopred$coefficients[1]+mlm.radon.nopred$coefficients[j+1] + c(-1,1)*mlm.radon.nopred$ses[j+1],
lwd=.5)
}
abline(h=mlm.radon.nopred$coefficients[1])
points(sample.size.jittered[36],
mlm.radon.nopred$coefficients[1]+mlm.radon.nopred$coefficients[37],cex=4)#,col="red")
title("Multilevel model",cex.main=1.8, line=1)
ranef(mlm.radon.pred)
mlm.radon.pred <- stan_lmer(y ~ x+ (1|county))
print(mlm.radon.pred)
ranef(mlm.radon.pred)
fixef(mlm.radon.pred)
mlm.radon.pred <- stan_lmer(y ~ x+ (1|county))
print(mlm.radon.pred)
ranef(mlm.radon.pred)
fixef(mlm.radon.pred)
a.hat <- coefficients(mlm.radon.pred)$county[,1]
b.hat <- coefficients(mlm.radon.pred)$county[,2]
x.jitter <- x + runif(n,-.05,.05)
display8 <- c (36, 1, 35, 21, 14, 71, 61, 70)  # counties to be displayed
y.range <- range (y[!is.na(match(county,display8))])
par (mfrow=c(2,4), mar=c(3,2,3,1), oma=c(1,1,2,1))
for (j in display8){
plot (x.jitter[county==j], y[county==j], xlim=c(-.05,1.05), ylim=y.range,
xlab="floor", ylab="log radon level", cex.lab=1.8, cex.axis=1.1,
pch=20, mgp=c(2,.7,0), xaxt="n", yaxt="n", cex.main=1.6,
main=uniq[j], lwd=1.5)
axis (1, c(0,1), mgp=c(2,.7,0), cex.axis=1.1)
axis (2, seq(-1,3,2), mgp=c(2,.7,0), cex.axis=1.1)
curve (coef(lm.pooled)[1] + coef(lm.pooled)[2]*x, lwd=3, lty=2, add=TRUE)
curve (coef(lm.unpooled)[j+1] + coef(lm.unpooled)[1]*x, lwd=3, add=TRUE)
curve (a.hat[j] + b.hat[j]*x,  col="red", lwd =3, add=TRUE)
}
# Complete pooling regression
lm.pooled <- lm (y ~ x)
display (lm.pooled)
# No pooling regression
lm.unpooled <- lm (y ~ x + factor(county) -1)
display (lm.unpooled)
x.jitter <- x + runif(n,-.05,.05)
display8 <- c (36, 1, 35, 21, 14, 71, 61, 70)  # counties to be displayed
y.range <- range (y[!is.na(match(county,display8))])
par (mfrow=c(2,4), mar=c(3,2,3,1), oma=c(1,1,2,1))
for (j in display8){
plot (x.jitter[county==j], y[county==j], xlim=c(-.05,1.05), ylim=y.range,
xlab="floor", ylab="log radon level", cex.lab=1.8, cex.axis=1.1,
pch=20, mgp=c(2,.7,0), xaxt="n", yaxt="n", cex.main=1.6,
main=uniq[j], lwd=1.5)
axis (1, c(0,1), mgp=c(2,.7,0), cex.axis=1.1)
axis (2, seq(-1,3,2), mgp=c(2,.7,0), cex.axis=1.1)
curve (coef(lm.pooled)[1] + coef(lm.pooled)[2]*x, lwd=3, lty=2, add=TRUE)
curve (coef(lm.unpooled)[j+1] + coef(lm.unpooled)[1]*x, lwd=3, add=TRUE)
}
x.jitter <- x + runif(n,-.05,.05)
display8 <- c (36, 1, 35, 21, 14, 71, 61, 70)  # counties to be displayed
y.range <- range (y[!is.na(match(county,display8))])
par (mfrow=c(2,4), mar=c(3,2,3,1), oma=c(1,1,2,1))
for (j in display8){
plot (x.jitter[county==j], y[county==j], xlim=c(-.05,1.05), ylim=y.range,
xlab="floor", ylab="log radon level", cex.lab=1.8, cex.axis=1.1,
pch=20, mgp=c(2,.7,0), xaxt="n", yaxt="n", cex.main=1.6,
main=uniq[j], lwd=1.5)
axis (1, c(0,1), mgp=c(2,.7,0), cex.axis=1.1)
axis (2, seq(-1,3,2), mgp=c(2,.7,0), cex.axis=1.1)
curve (coef(lm.pooled)[1] + coef(lm.pooled)[2]*x, lwd=3, lty=2, add=TRUE)
curve (coef(lm.unpooled)[j+1] + coef(lm.unpooled)[1]*x, lwd=3, add=TRUE)
curve (a.hat[j] + b.hat[j]*x,  col="red", lwd =3, add=TRUE)
}
library (arm)
library(R2WinBUGS)
library(rstanarm)
library(ggplot2)
theme_set(theme_minimal())  # customize the ggplot2 setting, not mandatory
library(bayesplot)
library(rstan)
library(invgamma)
library(gridExtra)
library(tidyr)
library(rprojroot)
library (foreign)
library(loo)
library(lme4)
srrs2 <- read.table ("srrs2.dat", header=T, sep=",")
mn <- srrs2$state=="MN"
radon <- srrs2$activity[mn]
log.radon <- log (ifelse (radon==0, .1, radon))
floor <- srrs2$floor[mn]       # 0 for basement, 1 for first floor
n <- length(radon)
y <- log.radon
x <- floor
## Partial pooling with no predictors
# get county index variable
county.name <- as.vector(srrs2$county[mn])
uniq <- unique(county.name)
J <- length(uniq)
county <- rep (NA, J)
for (i in 1:J){
county[county.name==uniq[i]] <- i
}
# no predictors
ybarbar = mean(y)
sample.size <- as.vector (table (county))
sample.size.jittered <- sample.size*exp (runif (J, -.1, .1))
cty.mns = tapply(y,county,mean)
cty.vars = tapply(y,county,var)
cty.sds = mean(sqrt(cty.vars[!is.na(cty.vars)]))/sqrt(sample.size)
cty.sds.sep = sqrt(tapply(y,county,var)/sample.size)
# varying-intercept model, no predictors
mlm.radon.nopred <- stan_lmer(y ~ 1+ (1|county)) # bayesian fit
mlm.radon.nopred.2 <- lmer(y ~ 1+ (1|county)) # frequentist fit
print(mlm.radon.nopred)
display(mlm.radon.nopred.2)
# Complete pooling regression
lm.pooled <- lm (y ~ x)
display (lm.pooled)
# No pooling regression
lm.unpooled <- lm (y ~ x + factor(county) -1)
display (lm.unpooled)
mlm.radon.pred <- stan_lmer(y ~ x+ (1|county))
print(mlm.radon.pred)
mlm.radon.pred.2 <- lmer(y ~ x+ (1|county))
display(mlm.radon.nopred.2)
mlm.radon.pred.2 <- lmer(y ~ x+ (1|county))
display(mlm.radon.pred.2)
mlm.radon.pred$stan_summary
mlm.radon.pred@stanmodel
mlm.radon.pred$coefficients
mlm.radon.pred$fitted.values
mlm.radon.pred$model
mlm.radon.pred$formula
mlm.radon.pred$stanfit
mlm.radon.pred$stan_function
mlm.radon.pred$prior.info
mlm.radon.pred$stanfit@stanmodel
y <- c(28,8,-3,7,-1,1,18,12)
s <- c(15,10,16,11,9,11,10,18)
x <- seq(-40, 60, length.out = 500)
df_sep <- mapply(function(y, s, x) dnorm(x, y, s), y, s, MoreArgs = list(x = x)) %>%
as.data.frame() %>% setNames(LETTERS[1:8]) %>% cbind(x) %>% gather(school, p, -x)
labs1 <- c('Other Schools', 'School A')
plot_sep <- ggplot(data = df_sep) +
geom_line(aes(x = x, y = p, color = (school=='A'), group = school)) +
labs(x = 'Treatment effect', y = '', title = 'Separate model', color = '', size =rel(2)) +
scale_y_continuous(breaks = NULL) +
scale_color_manual(values = c('blue','red'), labels = labs1) +
theme(legend.background = element_blank(), legend.position = c(0.8,0.9))+
theme(plot.title = element_text(hjust = 0.5, size =rel(2)),
axis.title=element_text(size=22))+
xaxis_text(on =TRUE, size=rel(1.9))+
yaxis_text(on =TRUE, size=rel(1.9))
plot_sep
